/*=======================================================================
Creator: Jingyuan Wang, jingyuanwang@u.northwestern.edu
Date created: 				03/17/2018
Date last modified: 		
Purpose: 
 Import all the trade dataset from Peter K Schott
	http://faculty.som.yale.edu/peterschott/sub_international.htm
==========================================================================*/


*************************************************************************
* 				PART I. Raw Data										*
*************************************************************************

* 1. imports **************************************************************
* (1). import data
local name "imp"
if "`name'" == "imp" {
	local var "gen_val_yr"
}
if "`name'" == "exp" {
	local var "all_val_yr"
}
*
local year 89
use "$Schottraw/`name'_detl_yearly_`year'n.dta", clear
keep commodity naics cty_code year `var'
order commodity naics cty_code year `var'
collapse (sum) `var', by(commodity naics cty_code year)

tempfile trade
forvalues year = 90(1)117 {
	preserve
		use "$Schottraw/`name'_detl_yearly_`year'n.dta", clear
		keep commodity naics cty_code  year `var'
		order commodity naics cty_code  year `var'
		collapse (sum) `var', by(commodity naics cty_code year)
		save `trade', replace
	restore
	
	append using `trade.dta'
}
*

* (2) add in country names
preserve
	insheet using "$crosswalks/us_trade_countries.csv", comma clear
	drop if  cty_code == .
	tempfile crosswalk
	save `crosswalk.dta', replace
restore
merge m:1 cty_code using `crosswalk.dta'
drop if _merge == 2
* USA and some US territories are dropped
drop _merge 

order commodity naics year gen_val_yr cty_code
rename gen_val_yr imports

save "$Schott/us_cntry_imports_commodity-89-17.dta", replace

* 2. exports data ************************************************************
* (1) import data
local name "exp"
if "`name'" == "imp" {
	local var "gen_val_yr"
}
if "`name'" == "exp" {
	local var "all_val_yr"
}
*
local year 89
use "$Schottraw/`name'_detl_yearly_`year'n.dta", clear
keep commodity naics cty_code  year `var'
order commodity naics cty_code  year `var'
collapse (sum) `var', by(commodity naics cty_code year)

tempfile trade
forvalues year = 90(1)117 {
	preserve
		use "$Schottraw/`name'_detl_yearly_`year'n.dta", clear
		keep commodity naics cty_code  year `var'
		order commodity naics cty_code  year `var'
		collapse (sum) `var', by(commodity naics cty_code year)
		save `trade', replace
	restore
	
	append using `trade.dta'
}
*

* (2) merge in country names
preserve
	insheet using "$crosswalks/us_trade_countries.csv", comma clear
	drop if  cty_code == .
	tempfile crosswalk
	save `crosswalk.dta', replace
restore
merge m:1 cty_code using `crosswalk.dta'
drop if _merge == 2
* USA and some US territories are dropped
drop _merge 

order commodity naics year all_val_yr cty_code
rename all_val_yr exports
save "$Schott/us_cntry_exports_commodity-89-17.dta", replace



*************************************************************************
* 				PART II. merge imports and exports						*
*************************************************************************

* 1. generate naics-year level dataset *******************************
preserve
	use "$Schott/us_cntry_imports_commodity-89-17.dta", clear
	collapse (sum) imports, by( naics year)
	drop if naics == "" | naics == "."
	tempfile imports
	save `imports.dta', replace
restore

preserve
	use "$Schott/us_cntry_exports_commodity-89-17.dta", clear
	collapse (sum) exports, by( naics year)
	drop if naics == "" | naics == "."
	tempfile exports
	save `exports.dta', replace
restore

use `imports.dta', clear
merge 1:1 naics year using `exports.dta'
drop _merge

* keep only NAICS 311*** - 339*** industries
rename naics naics_str
destring naics_str, gen(naics) force
gen naics_3digit = substr(naics_str, 1, 3)
destring naics_3digit, replace
tab naics_3digit
keep if naics_3digit >= 300 & naics_3digit<= 400

* deal with some NAICS5-NAICS6 level observations
* 31511X : keep only 31511X and drop other NAICS6 level industries
drop if naics_str == "315119" | naics_str == "315111"

* 32531X: 32531X = 325311 + 325312 + 325314. We have 325311 and 325312 for all the years.
drop if naics_str == "32531X"

* 33631X: keep only 31511X and drop other NAICS6 level industries
drop if naics_str == "336311"

* 33641X: we already have all the NAICS6 level observations within 33641.
drop if naics_str == "33641X"

* save
drop naics
order naics_str naics_3digit year imports exports
label var naics_str "89-01: 1997code; 02-06: 2002code; 07-12: 2007code; 03-16: 2012code; 17: 17code"

save "$Schott/trade_naics-89-17.dta", replace


* 2. generate naics-year-country level dataset *******************************

preserve
	use "$Schott/us_cntry_imports_commodity-89-17.dta", clear
	
	* keep only NAICS 311*** - 339*** industries
	rename naics naics_str
	destring naics_str, gen(naics) force
	gen naics_3digit = substr(naics_str, 1, 3)
	destring naics_3digit, replace
	tab naics_3digit
	keep if naics_3digit >= 300 & naics_3digit<= 400
	drop naics
	*
	drop if isocode_uniform == "Unknown"
	
	* collapse to NAICS6 level
	collapse (sum) imports, by(isocode_uniform region region_wb naics_3digit naics_str year)
	drop if naics_str == "" | naics_str == "."
	
	order naics_str naics_3digit year
	sort naics_str year isocode_uniform region_wb region
	replace region_wb = "North America" if region == "North America" 
	replace region_wb = "Unknown" if region_wb == "" 

	* adjust measurement unit
	replace imports = imports/1000000000
	label var imports "imports [$ b]"

	label var naics_str "89-01: 1997code; 02-06: 2002code; 07-12: 2007code; 13-16: 2012code; 17: 17code"
	save "$Schott/us_imports_naics-cntry-89-17.dta", replace
restore

preserve
	use "$Schott/us_cntry_exports_commodity-89-17.dta", clear
	
	* keep only NAICS 311*** - 339*** industries
	rename naics naics_str
	destring naics_str, gen(naics) force
	gen naics_3digit = substr(naics_str, 1, 3)
	destring naics_3digit, replace
	tab naics_3digit
	keep if naics_3digit >= 300 & naics_3digit<= 400
	drop naics
	
	*
	drop if isocode_uniform == "Unknown"
	
	* collapse to NAICS6 level
	collapse (sum) exports, by(isocode_uniform region region_wb naics_str naics_3digit year)
	drop if naics_str == "" | naics_str == "."
	

	order naics_str naics_3digit year
	sort naics_str year isocode_uniform region_wb region
	replace region_wb = "North America" if region == "North America" 
	replace region_wb = "Unknown" if region_wb == "" 

	* adjust measurement unit
	replace exports = exports/1000000000
	label var exports "exports [$ b]"

	label var naics_str "89-01: 1997code; 02-06: 2002code; 07-12: 2007code; 13-16: 2012code; 17: 17code"
	save "$Schott/us_exports_naics-cntry-89-17.dta", replace
restore
